# import business licenses data
# bus_licenses <- read_csv(here::here("data", "Business_Licenses.csv"))
bus_licenses <-  readRDS(here::here("data", "bus_licenses.Rda"))


# view problems report
# problems(bus_licenses)


# investigate nonstandard zip codes flagged -- looks like int'l businesses operating in Chicago
# opting not not to remove
# bind_cols(bus_licenses[unlist(problems(bus_licenses)[, 'row']), c('LICENSE ID', 'LEGAL NAME', 'DOING BUSINESS AS NAME', 'ADDRESS', 'CITY', 'STATE', 'BUSINESS ACTIVITY')], problems(bus_licenses)[, 'actual']) %>% arrange(ADDRESS) %>% distinct(`DOING BUSINESS AS NAME`, `BUSINESS ACTIVITY`, ADDRESS, CITY, STATE, actual)

In the last decade, underepresented minority populations in Chicago have been increasingly relegated to lower income areas in the South and West of the City.

# all_demos <-  readRDS(here::here("data", "all_demos_chi.Rda"))
all_demos.Chi <- readRDS(here::here("data", "all_demos_chi_proj.Rda"))
wards.2015 <-  readRDS(here::here("data", "wards2015_sf.Rda"))


# plot intersection of Census tracts with chicago wards
ggplot(st_intersection(all_demos.Chi, wards.2015) %>% filter(!is.na(predominant_race))) +
  
  # color based on predominant race based on Census, shaded by percentage of that race
  geom_sf(aes(fill=predominant_race, alpha=max_pct), lwd = 0) +
  scale_fill_dt("diverging", reverse=TRUE) +
  scale_alpha(range = c(0.35, 0.9), guide="none") + 
  
  # outline Chicago wards over data
  geom_sf(data = wards.2015, color="black", fill=NA) +
  
  # label wards with the most movement
  geom_text_repel(data=wards.2015 %>% filter(ward %in% c(10, 18)), 
                  aes(x=long, y=lat, label=ward),
                  fontface="bold",
                  force = 5,
                  size = 6,
                  direction = "both",
                  hjust=0) +
  geom_text_repel(data=wards.2015 %>% filter(ward %in% c(9, 13, 14, 33, 34)), 
                  aes(x=long, y=lat, label=ward),
                  nudge_x = -.35,
                  segment.size = 0.5,
                  segment.color = get_dt_cols("cocoa"),
                  fontface="bold",
                  size = 6,
                  force = 5,
                  direction = "both",
                  hjust=0) +
  geom_text_repel(data = wards.2015 %>% filter(ward %in% c(11)), 
                  aes(x=long, y=lat, label=ward),
                  nudge_x = .15,
                  segment.size = 0.5,
                  segment.color = get_dt_cols("cocoa"),
                  fontface="bold",
                  size = 6,
                  force = 10,
                  direction = "both",
                  hjust = 1) +
  coord_sf(datum = NA) +
  theme_map_modest() + 
  theme(plot.margin = unit(c(20, 0, 0, 0), "pt"),
        legend.title = element_text(size=15),
        legend.text = element_text(size=12),
        plot.title = element_text(size = 20, face = "bold", hjust="0.5", margin = margin(t = 15)),
        plot.subtitle = element_text(size = 15, margin = margin(t = 15)),
        plot.caption = element_text(size = 15)) +
  
  # plot each year separately
  facet_wrap( ~ id) + labs(
    title = "Latinx Populations in Chicago\nPushed to South, West Neighborhoods",
    subtitle = "Chicago Racial and Ethnic Group Movement\nby Census Tract Since 2012 (5 year averages)",
    caption = "Source: U.S. Census Bureau", fill = "Predominant Race in Tract")

Business Activity by Chicago Side Over Time

output_j <-  readRDS(here::here("data", "j_all_wards_all_dates.Rda"))

allDatesCount.df <- output_j %>% expand(SIDE, WARD, `APPLICATION TYPE`, count_date) %>% 
 full_join(output_j) %>%  arrange(SIDE, WARD, `APPLICATION TYPE`, count_date) %>% 
  mutate(
    activity_wk = lubridate::as_date(
      cut(count_date, breaks = "week", start.on.monday = FALSE, origin = lubridate::origin)),
   activity_month = lubridate::as_date(
      cut(count_date, breaks = "month", start.on.monday = FALSE, origin = lubridate::origin)),
   activity_qtr = lubridate::as_date(
      cut(count_date, breaks = "quarter", start.on.monday = FALSE, origin = lubridate::origin)) 
   )
## Joining, by = c("SIDE", "WARD", "APPLICATION TYPE", "count_date")
allDatesCount.df %>% filter(`APPLICATION TYPE` %in% c("ISSUE", "RENEW")) %>% group_by(SIDE, activity_qtr) %>% 
  summarise(active_businesses = sum(active_businesses)) %>% arrange(activity_qtr, desc(active_businesses)) %>%
              ggplot(aes(x=activity_qtr, y=active_businesses, group=SIDE)) +   
  geom_bar(aes(x=activity_qtr, y=active_businesses, group=SIDE, fill=SIDE), stat = "identity") +
  scale_fill_dt("main") +
  geom_text(aes(label=active_businesses), size = 4.5, position = position_stack(vjust = 0.5), color="white") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_date(date_labels = "%b %y", 
                date_breaks = "1 year", limits = c(ymd("2012-01-01"), ymd("2018-12-31"))) +
  theme_modest() + 
  theme(legend.position = c(0.5, 0.9),
        legend.direction = "horizontal",
        axis.title.x = element_blank(),
        plot.margin = unit(c(2,2,2,2),"cm")) +
      labs(y="Number of Business Licenses Issued or Renewed", colour="Chicago Council Ward", caption="Data Source: Chicago Open Data Portal",  title="Business Activity Stagnant in Northwest, Far Southwest, Far Southeast", subtitle = "Minimal New and Renewing Businesses Since 2012 in Three Chicago Areas", fill="Chicago Area") 

Mapping Chicago’s Forgotten Economies

register_google(key = Sys.getenv("GOOGLEMAPS_KEY"))


# read in Chicago ward geos in ggmap-compatible format
wards.shp.2015 <- here::here("data","Geofiles - Chicago Zip Code and Neighborhood","Boundaries - Wards (2015-)","geo_export_0bb2e9fd-20ca-415b-a96a-7722d72c1b41.shp")
wards2015 <- shapefile(wards.shp.2015)
wards2015@data <- mutate(wards2015@data, id = rownames(wards2015@data))
wards2015.points <- fortify(wards2015, region="id")
wards2015.df <- merge(wards2015.points, wards2015@data, by="id")



# name license types
license_types <- c("ISSUE" = "New Business Licenses", 
                   "RENEW" = "Business License Renewals")  

# filter for 2012+ issuances and licenses 
# count each business only once per year)
bl <- bus_licenses %>% filter(activity_yr >= 2012, active == 1, `APPLICATION TYPE`%in% c('ISSUE', 'RENEW')) %>% 
    distinct(`LICENSE ID`, activity_yr, `APPLICATION TYPE`, LONGITUDE, LATITUDE) 
  
# pull in Chicago terrain map from Google Maps
  ggmap::ggmap(ggmap::get_googlemap(center = c(lon = -87.732125, lat = 41.83379),
                      zoom = 10, scale = 1,
                      maptype ='terrain',
                      color = 'color',
                      key = Sys.getenv("GOOGLEMAPS_KEY"))) + 
    
    # layer licenses over each other at extremely low alpha
    geom_point(data=bl, aes(x=LONGITUDE, y=LATITUDE, color=as.factor(activity_yr)), alpha = 0.05, show.legend = FALSE, na.rm = TRUE) + 
    
    # add Chicago Ward boundaries
    geom_path(data = wards2015.df, aes(long,lat,group=group), color="black") +
    geom_polygon(data = wards2015.df, aes(long,lat,group=group), fill=NA) + 
    scale_color_dt("desert") +
    labs(y="Latitude", x="Longitude", colour="Year", title="Money in the Middle", 
         subtitle="Virtually No New Business Entry, Renwal in Chicago Wards 9, 10, 18, or 41 since\n2009", 
         caption = "Data Source: City of Chicago Department of Business Affairs and Consumer Protection"
         ) +
    theme_modest() + 
    facet_grid(. ~ `APPLICATION TYPE`, labeller=as_labeller(license_types)) +
    theme(
      panel.border = element_blank(),
      axis.text.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.y = element_blank(),
      axis.title = element_blank(),
      axis.text.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.minor.x = element_blank(),
      plot.title = element_text(face = "bold", hjust="0.5", margin = margin(t = 15)),
      plot.subtitle = element_text(hjust="0.5", margin = margin(t = 20))
    ) +
    
    # zoom graph to outer ward boundaries
    scale_x_continuous(limits = c(wards2015.df %>% dplyr::select(long) %>% min() - 0.1, wards2015.df %>% dplyr::select(long) %>% max() + 0.05), expand = c(0, 0)) +
    scale_y_continuous(limits = c(wards2015.df %>% dplyr::select(lat) %>% min() - 0.1, wards2015.df %>% dplyr::select(lat) %>% max()) + 0.05, expand = c(0, 0))

# filter for business license issuances and renewals, and create a monthly count
bus_licenses %>% filter(!is.na(WARD), active==1,  activity_date > mdy("1/1/2012")) %>%
  group_by(activity_month, activity_yr, WARD, SIDE) %>% 
  summarise(business_count = n()) %>% group_by(WARD) %>% arrange(desc(business_count)) %>% 
  
  # plot boxplot of median monthly issuances and renewals for each ward
  ggplot(aes(x=reorder(WARD, business_count, FUN = median), y=business_count)) + 
  
  # color by Chicago Side
  geom_boxplot(aes(group=as.factor(WARD), fill=as.factor(SIDE)), alpha=0.5) + 
  scale_fill_dt("diverging") +
  
  # label each each box by corresponding Ward, just above the median (8 pt)
  geom_text(data = bus_licenses %>% filter(!is.na(WARD), active==1) %>%
                    group_by(activity_month, activity_yr, WARD, SIDE) %>% 
                    summarise(business_count = n()) %>% group_by(WARD) %>% 
                    arrange(desc(business_count)) %>% 
                    summarise(median_issuances = median(business_count)) %>% 
                    arrange(median_issuances),
                   aes(x=as.factor(WARD), y = median_issuances + 50, group=as.factor(WARD),
                       label=as.factor(WARD)),
            color="#6E2C49", fontface="bold", size=6) + 
  
  # limit scale to 750, (one Loop ward's outliers extend ~1000 above other wards)
  # still very clearly the highest even without all outliers visible
  scale_y_continuous(limits = c(0, 750)) +

  labs(x="Chicago Council Ward", y="Average Monthly Business Count", caption="Data Source: Chicago Open Data Portal", title="Least New & Surviving Businesses\nin South, West Wards for 15+ Years", subtitle="Calumet Heights, Westlawn Average less than 25 Monthly\nBusiness License Issuances",fill="Chicago Area") + 
  theme_modest() +
  theme(
        plot.margin = unit(c(20, 0, 0, 0), "pt"),
        panel.grid.major.x = element_blank(),
        axis.ticks =  element_blank(),
        axis.text.x = element_blank(),
        plot.title = element_text(face = "bold", hjust="0.5", margin = margin(t = 15)),
        plot.subtitle = element_text(margin = margin(t = 15))
        )

# read in turnout and change in turnout by Chicago ward and Chciago Side
  turnoutSides <-  readRDS(here::here("data", "sides_turnout.Rda"))
  turnoutDiff <-  readRDS(here::here("data", "wards_turnout.Rda"))

  # plot average turnout in 2011 and 2015 for each Chicago Side
  ggplot(turnoutSides, aes(x=as.factor(YEAR), y=TURNOUT, group=as.factor(SIDE))) + 
  geom_line(aes(colour=SIDE), size=1.5, alpha=0.75, color="grey", show.legend = FALSE) +
  # geom_line(data = filter(turnoutSides, (DIFFERENCE > -0)||(is.na(DIFFERENCE))), aes(colour=SIDE), size=1.5, show.legend=FALSE) +
  scale_color_dt("mixed", reverse = TRUE) + 
    
  # plot each ward's turnout in 2011 and 2015, shaded by Chicago side to provide context for 
  # distribution within each Side.
  geom_line(data = turnoutDiff, aes(group=as.factor(WARD), color=SIDE), size=0.5, alpha=0.25, show.legend = FALSE) +
    
  # add mean turnout (%) for each Side to x-axis
  geom_label(data = turnoutDiff %>% group_by(YEAR, SIDE) %>% summarise(MEAN_TURNOUT = round(mean(TURNOUT, na.rm=TRUE),1)),
            aes(x=as.factor(YEAR), y= MEAN_TURNOUT, label = paste0(MEAN_TURNOUT, "%"), group=as.factor(SIDE)),
            color = "darkgray",
            label.padding = unit(0.05, "lines"),
            label.size = 0.0,
            fontface = "bold",
            size = 4) +

    # label each Side in line graphs for both 2011 and 2015
     geom_label_repel(data = turnoutSides %>% filter(YEAR == 2011),
            aes(label = paste0(SIDE), color = SIDE),
            fill=NA,
            hjust = "left",
            nudge_x = -.25,
            force=5,
            direction = "both",
            fontface = "bold",
            point.padding   = 3,
            size = 5,
            show.legend = FALSE) +
  geom_label_repel(data = turnoutSides %>% filter(YEAR == 2015),
            aes(label = paste0(SIDE), color = SIDE),
            fill=NA,
            hjust = "right",
            nudge_x = .25,
            force = 7.5,
            direction = "both",
            fontface = "bold",
            point.padding   = 3,
            size = 5,
            show.legend = FALSE) +
    
  # move x-axis text to top of graph
  scale_x_discrete(position = "top") +
   # coord_cartesian(ylim=c(23.5, 60)) +
  theme_modest() + 
  theme(axis.text.x.top = element_text(size=rel(1.25), vjust = -8, face="bold"),
          axis.title.y = element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.minor=element_blank(),
          panel.grid.major=element_blank()) +
  labs(title="Highest Voter Turnout In Wards Facing Deinvestment", 
         subtitle="Change in Voter Turnout Between 2011 and 2015 Chicago City Council Elections", 
         caption="Source: Chicago Board of Election Commissioners", color="Chicago Area")